#!/usr/bin/env ruby

require 'open-uri'
require 'rubygems'
require 'hpricot'
require 'mechanize'

# USAGE:
#   ruby ./getCutSite.rb EcoRI BamHI
#   ruby ./getcutSite.rb EcoR1


ARGV.each { |enzyme|
	
  # Create mechanization agent
  agent = WWW::Mechanize.new()

  # Rebase discourages automated scraping...but that's not a problem, let's
  # pretend we are an "Internet Explorer 6" browser running on Windows:
  agent.user_agent_alias = 'Windows IE 6'

  # Grab page contents:
  page = agent.get("http://rebase.neb.com/cgi-bin/reb_get.pl?enzname=#{enzyme}")

  # Isolate the content we want to extra using so-called "CSS selector" syntax:
  # If we got a table:
  recordElems = page.search("//table[@bgcolor='beige']//tr")
  if(!recordElems.nil? and !recordElems.empty?)
    # Extract info from each row:
    recordElems.each { |recElem|
      fullEnzymeNameElem = recElem.search("//td[1]//font[@color='#0000BB']")
      cutSeqElem = recElem.search("//td[3]//font[@size='2']")
      # skip the records with "-" for cut sequence in results table
      unless(cutSeqElem.inner_html.strip == '-' or cutSeqElem.inner_html.strip.empty?)
        puts "  #{fullEnzymeNameElem.inner_html}  =>  #{cutSeqElem.inner_html}"
      end
    }
  else # probably went right to the enzyme's page
    elems = page.search("//center//center//td//font[@color='']")
    puts "  #{enzyme}  => #{elems.first.inner_html}" unless(elems.nil? or elems.empty?)
  end
}